# Generate statistics for SAT on income for Tables 2 and S1

library(caret)
library(tidyverse)

merged_df <- read.csv("merged_final.csv")
merged_df <- merged_df[merged_df$FAMILY_INCOME > 10000, ]
merged_df <- merged_df[!is.na(merged_df$FAMILY_INCOME),]

set.seed(1993)
merged_folds <- trainControl(method = "cv", number = 10)

merged_mod <- train(FAMILY_INCOME ~ RSAT_TOTAL_SCORE, method = "lm",
                    data = merged_df, trControl = merged_folds)

print(merged_mod)
summary(merged_mod)

################################################################################

creativ_df <- read.csv("creative_final.csv")
creativ_df <- creativ_df[creativ_df$FAMILY_INCOME > 10000, ]
creativ_df <- creativ_df[!is.na(creativ_df$FAMILY_INCOME),]

set.seed(1993)
creativ_folds <- trainControl(method = "cv", number = 10)

creativ_mod <- train(FAMILY_INCOME ~ RSAT_TOTAL_SCORE, method = "lm",
                     data = creativ_df, trControl = creativ_folds)

print(creativ_mod)
summary(creativ_mod)

################################################################################

signif_df <- read.csv("signif_final.csv")
signif_df <- signif_df[signif_df$FAMILY_INCOME > 10000, ]
signif_df <- signif_df[!is.na(signif_df$FAMILY_INCOME),]

set.seed(1993)
signif_folds <- trainControl(method = "cv", number = 10)

signif_mod <- train(FAMILY_INCOME ~ RSAT_TOTAL_SCORE, method = "lm",
                    data = signif_df, trControl = signif_folds)

print(signif_mod)
summary(signif_mod)
